library(tidyverse)
library(ggrastr)
library(ggrepel)
library(patchwork)
library(modelsummary)
config_modelsummary(factory_default = "kableExtra")

load("data/plotting_data.RData")

fb_interact_bubble_data <- function(url_df, domain_df, count_var, domain_score_var, label, threshold) {
    agg_df <- url_df |>
        rename(action := {{ count_var }}) |>
        group_by(domain) |>
        summarise(m_bubble = mean((sub_sig_left + sub_sig_right) > 0),
                  total_actions = sum(action),
                  total_urls = n()) |>
        left_join(domain_df, by = "domain") |>
        filter(total_urls >= threshold)
    return(agg_df)
    }

    
fb_interact_bubble_plot <- function(url_df, domain_df, count_var, domain_score_var, label, threshold) {
    agg_df <- fb_interact_bubble_data(url_df, domain_df, count_var, domain_score_var, label, threshold)
    
    p <- agg_df |>
        rename( domain_score := {{ domain_score_var }}) |>
        ggplot(aes(x = domain_score , y = m_bubble)) +
        rasterize(geom_point(aes(size = total_actions), alpha = .25), dpi=300) +
        scale_size_continuous(name = glue::glue("Total {stringr::str_to_title(label)}")) +
        geom_smooth(alpha = 0.5, se = FALSE) +
        scale_y_continuous(labels = scales::percent) +
        geom_text_repel(aes(label = ifelse(domain %in% c("mediaite.com",
                                                    "wsj.com",
                                                    "foxnews.com",
                                                    "buzzfeednews.com",
                                                    "nypost.com",
                                                    "dailymail.co.uk",
                                                    "hollywoodreporter.com",
                                                    "realclearpolitics.com",
                                                    "redstate.com",
                                                    "politifact.com",
                                                   "nytimes.com",
                                                    "huffpost.com",
                                                    "usatoday.com",
                                                    "rollcall.com",
                                                    "cbsnews.com",
                                                    "reason.com",
                                                    "axios.com",
                                                    "inc.com",
                                                    "washingtontimes.com",
                                                    "rt.com",
                                                    "nationalreview.com",
                                                    "dailykos.com",
                                                   "breitbart.com"), 
                                      domain, NA)),
                  col = "red",
                 min.segment.length = 0,
                 size= 4) +
        labs(x = "Domain Score",
            y = "Proportion URLs Substantively Distinct from Domain\n(99% Interval)",
             title = glue::glue("{stringr::str_to_title(label)}")
            ) +
        theme_classic()+
        theme(text = element_text(family = "serif", size = 12),
             plot.title = element_text(size = 16, face = "bold"),
             legend.position = "bottom",
             legend.direction = "vertical",
) +
        coord_cartesian(xlim = c(-1, 1))
    return(p)

    
    }

p_share <- fb_interact_bubble_plot(url_df |> unnest(shares_ci), 
                        domains, 
                        count_var = "num_shares", 
                        domain_score_var = "domain_shares_score", 
                        label = "shares",
                        threshold = quantile(domains$num_domain_urls, 0.75))
p_view <- fb_interact_bubble_plot(url_df |> unnest(views_ci), 
                        domains, 
                        count_var = "num_views", 
                        domain_score_var = "domain_views_score", 
                        label = "views",
                        threshold = quantile(domains$num_domain_urls, 0.75))
p_react <- fb_interact_bubble_plot(url_df |> unnest(reacts_ci), 
                        domains, 
                        count_var = "num_reacts", 
                        domain_score_var = "domain_reacts_score", 
                        label = "reacts",
                        threshold = quantile(domains$num_domain_urls, 0.75))
p_click <- fb_interact_bubble_plot(url_df |> unnest(clicks_ci), 
                        domains, 
                        count_var = "num_clicks", 
                        domain_score_var = "domain_clicks_score", 
                        label = "clicks",
                        threshold = quantile(domains$num_domain_urls, 0.75))

full_p <- (p_view + p_click) / (p_share + p_react)
full_p <- full_p +
    theme_classic()& 
    theme(text = element_text(family = "serif"),
          plot.title = element_text(size = 24, face = "bold"),
            panel.grid.major.y = element_line(color="grey90"),
            panel.grid.minor.y = element_line(color="grey96"))

ggsave(full_p, file = "results/fig_8.pdf", height = 8*1.5, width = 10*1.5, dpi=450)
ggsave(full_p, file = "results/fb_interact_plot.pdf", height = 8*1.5, width = 10*1.5, dpi=450)

d_share <- fb_interact_bubble_data(url_df |> unnest(shares_ci), 
                        domains, 
                        count_var = "num_shares", 
                        domain_score_var = "domain_shares_score", 
                        label = "shares",
                        threshold = quantile(domains$num_domain_urls, 0)) |>
    mutate(domain_extremity = abs(domain_shares_score))
d_view <- fb_interact_bubble_data(url_df |> unnest(views_ci), 
                        domains, 
                        count_var = "num_views", 
                        domain_score_var = "domain_views_score", 
                        label = "views",
                        threshold = quantile(domains$num_domain_urls, 0)) |>
    mutate(domain_extremity = abs(domain_views_score))
d_react <- fb_interact_bubble_data(url_df |> unnest(reacts_ci), 
                        domains, 
                        count_var = "num_reacts", 
                        domain_score_var = "domain_reacts_score", 
                        label = "reacts",
                        threshold = quantile(domains$num_domain_urls, 0)) |>
    mutate(domain_extremity = abs(domain_reacts_score))
d_click <- fb_interact_bubble_data(url_df |> unnest(clicks_ci), 
                        domains, 
                        count_var = "num_clicks", 
                        domain_score_var = "domain_clicks_score", 
                        label = "clicks",
                        threshold = quantile(domains$num_domain_urls, 0)) |>
    mutate(domain_extremity = abs(domain_clicks_score))

models <- list(
    "FB Shares" = lm(m_bubble ~ domain_extremity + num_domain_urls + I(log10(num_domain_shares)), data = d_share),
    "FB View" = lm(m_bubble ~ domain_extremity + num_domain_urls + I(log10(num_domain_shares)), data = d_view),
    "FB Clicks" = lm(m_bubble ~ domain_extremity + num_domain_urls + I(log10(num_domain_shares)), data = d_click),
    "FB Reacts" = lm(m_bubble ~ domain_extremity + num_domain_urls + I(log10(num_domain_shares)), data = d_react)
    )

modelsummary::modelsummary(models, 
             output = "results/table_h1b.tex", 
             fmt = 3, 
             stars = TRUE, 
             coef_rename = c("(Intercept)" = "Intercept", 
                             "domain_extremity" = "Domain Extremity", 
                             "n_urls" = "Num. URLs", 
                             "I(log10(n_shares))" = "Num. Shares (logged)"),
            )


